* FIG 10: EVENT STUDIES AND DID ESTIMATIONS OF CANTONAL TAX REVENUE PER CAPITA 


/* following packages need to be installed:
ssc install winsor2
ssc install xtvent
ssc install coefplot
*/

version 16.1

clear all
cap log close 
cap clear matrix
set more off
graph drop _all

cap set scheme mygraphs




clear
cap log close 
cap clear matrix
set more off
set scheme myscheme


* ----------------------------------------------------------------------
* Read in the employment data
cd "$mypathRR/Datasets/employment"

* 1) STATENT 
import excel using "STATENT_LOC-2005-2016.xlsx", ///
sheet("alle Rechtsformen") cellrange(A6:W33) firstrow clear
replace kanton = "CH" if kanton == "Total"


* gen correct id-var
gen cant = _n
replace cant = 1 if kanton =="ZH"
replace cant = 2 if kanton =="BE"
replace cant = 3 if kanton =="LU"
replace cant = 4 if kanton =="UR"
replace cant = 5 if kanton =="SZ"
replace cant = 6 if kanton =="OW"
replace cant = 7 if kanton =="NW"
replace cant = 8 if kanton =="GL"
replace cant = 9 if kanton =="ZG"
replace cant = 10 if kanton =="FR"
replace cant = 11 if kanton =="SO"
replace cant = 12 if kanton =="BS"
replace cant = 13 if kanton =="BL"
replace cant = 14 if kanton =="SH"
replace cant = 15 if kanton =="AR"
replace cant = 16 if kanton =="AI"
replace cant = 17 if kanton =="SG"
replace cant = 18 if kanton =="GR"
replace cant = 19 if kanton =="AG"
replace cant = 20 if kanton =="TG"
replace cant = 21 if kanton =="TI"
replace cant = 22 if kanton =="VD"
replace cant = 23 if kanton =="VS"
replace cant = 24 if kanton =="NE"
replace cant = 25 if kanton =="GE"
replace cant = 26 if kanton =="JU"
replace cant = 0 if kanton =="CH"

labmask cant, values(kanton)

* import population stats
forval y = 2005/2016{
preserve
import excel using "STATPOP-ESPOP-population.xlsx", ///
sheet("`y'") cellrange(H8:H34) clear
rename H pop`y'
gen cant = _n-1

tempfile pop`y'
save "`pop`y''"
restore

merge 1:1 cant using "`pop`y''" , nogen
}


* reshape
reshape long Beschäftigte Vollzeitäquivalente Arbeitsstätten pop, j(year) i(cant)
rename Beschäftigte jobs
rename Vollzeitäquivalente FTE
gen Quelle = "STATENT"



* 2) BETRIEBSZÄHLUNG BZ

* import population stats
foreach y in 1995 2001 2005 2008 {
preserve
import excel using "STATPOP-ESPOP-population.xlsx", ///
sheet("`y'") cellrange(H8:H34) clear
rename H pop_
gen cant = _n-1
gen year = `y'

tempfile pop_`y'
save "`pop_`y''"
restore
}

* attach data from Betriebszählung (to show parallel trends pre-treatment)
preserve
import excel using "BZ-Total_VZE.xlsx", ///
cellrange(A3:F111) sheet("data") firstrow clear
drop kanton

foreach y in 2001 1995 2005 2008 {
merge 1:1 cant year using "`pop_`y''" , nogen update
}

gen jobs_pc_bz = jobs/pop_*1000
gen FTE_pc_bz = FTE/pop_*1000
rename jobs jobs_bz
rename FTE FTE_bz
tempfile bz
save "`bz'"
restore

merge 1:1 cant year using "`bz'", update
sort cant year


keep if (year == 1995 | year == 2001 | year == 2005 | year == 2008 | year == 2011 | year == 2012 | year == 2013 | year == 2014 | year == 2015 | year == 2016)

* gen jobs per capita
gen jobs_pc = jobs/pop*1000
gen FTE_pc = FTE/pop*1000


* gen growth rates
foreach var in FTE_pc jobs_pc FTE jobs FTE_pc_bz jobs_pc_bz FTE_bz jobs_bz {
	bys cant (year): gen `var'_gr = (`var'/`var'[_n-1]-1)
}

* checke whether growth rates between 2005-2008 align well between the two datasets
foreach var in FTE_pc jobs_pc FTE jobs {
	gr tw (scatter `var'_gr `var'_bz_gr) (line `var'_bz_gr `var'_bz_gr) ///
	, ytitle("STATENT") xtitle("BZ") legend(order(1 "`var'")) name(`var', replace)
}
		// this looks very good, growth rates are on the 45 degree line
		
		
* genereate a series that combines STATENT data with growth rates from BZ data
foreach var in FTE_pc jobs_pc FTE jobs {
bys cant (year): replace `var' = `var'[_n+1] / (1 + `var'_bz_gr[_n+1]) if	`var' == . & `var'_bz_gr[_n+1] !=.
bys cant (year): replace `var' = `var'[_n+1] / (1 + `var'_bz_gr[_n+1]) if	`var' == . & `var'_bz_gr[_n+1] !=.
}

* tidy up
replace pop = pop_ if pop == .
drop pop_ 
replace Quelle = "STATENT-BZ growth" if Quelle == ""
drop kanton quelle *_bz *_gr _merge
order cant year jobs FTE pop jobs_pc FTE_pc Quelle

* gen shares
sort year cant
bysort year (cant) : gen sjobs = jobs / jobs[1]
bysort year (cant) : gen sFTE = FTE / FTE[1]
sort cant year


* label everything:
label var cant "Canton"
label var year "Year"
label var jobs "# jobs"
label var FTE "# FTE jobs"
label var pop "Population"
label var sjobs "Cantonal share of jobs"
label var sFTE "Cantonal share of FTE jobs"
label var jobs_pc "# jobs per 1,000 inhabitants"
label var FTE_pc "# FTE jobs per 1,000 inhabitants"
label var Quelle "Data source"
label var Arbeitsstätten "# establishments"

*--------------------------------------------------------------------------------------
* PREPARE DATA FOR EVENT STUDY

* Gen canton treatment dummies
gen treated = (cant == 6)
  label var treated "Treated"
  
gen period = (year > 2005 & year!=.)
  label var period "Period $ t>2005$"

* Gen DiD Interaction term
gen interaction = treated*period
  label var interaction "DiD"


* Gen interaction terms for pre-treatment periods
gen pre3 = 0
replace pre3 = 1 if treated == 1 & year == 1995
gen pre2 = 0
replace pre2 = 1 if treated == 1 & year == 2001
gen pre1 = 0 // make 2005 the reference year


* Gen interaction terms for post-treatment periods
gen post1 = 0
replace post1 = 1 if treated == 1 & year == 2008

forval n=2/7 {
local i = 2009 + `n'
gen post`n' = 0
replace post`n' = 1 if treated == 1 & year == `i'
label var post`n' "`i' (treatment lead `n')"
}

* Gen canton-specific time trends
	tab cant, gen (cant_d)
	// adjust names for correct canton id
	local i = 0
	forval n = 0/26 {
		local i = `i'+1
		rename cant_d`i' cant_d`n'
	}
	
	foreach var of varlist cant_d* {
	gen trend_`var' = year*`var'
	label var trend_`var' "Canton specific time trend"
	}
		
	// overall trend
	gen trend = year

* * *   * * *   * * *   * * *   * * *   * * *
*  RESIDUALIZED  TIME TREND ESTIMATION
* * *   * * *   * * *   * * *   * * *   * * *
global outcomes "jobs FTE jobs_pc FTE_pc"

* Estimate a time trend for each  outcome	 
cd "$mypathRR/Results"
	
* estimate & predict canton trend
	preserve
		foreach outcome in $outcomes  {
			reg `outcome' trend_* cant_d* if year < 2006 & year > 1994 
			predict tr`outcome', xb
			label var tr`outcome' "predicted cantonal time trend in `outcome'"
		}
		
		keep  cant year tr* 
		drop trend* treated
		bys cant year: keep if _n==1
	save "est_canton_trends.dta", replace
	restore 	 
	drop cant_d*

* match trend back to original data
	merge m:1 year cant using "est_canton_trends.dta", nogen keep(1 3)
	rm "est_canton_trends.dta"

	
* generate residualized outcomes
foreach outcome in $outcomes {
	reg `outcome' tr`outcome'
	predict resid`outcome', residual
	label var resid`outcome' "residualized `outcome' (after taking out canton trend)"
	
	gen detrend`outcome' = `outcome' - tr`outcome'
	label var detrend`outcome' "detrended `outcome' (after taking out canton trend)"
}
/*
* check residualized outcomes
foreach outcome in $outcomes  {
scatter resid`outcome' detrend`outcome' if cant == 6 || ///
lfit resid`outcome' detrend`outcome' if cant == 6 		///
, xlab(-30(10)80, grid) ylab(-30(10)80, grid) 			///
name(resid_pretrend_`outcome', replace)
}

foreach outcome in $outcomes  {
scatter `outcome' year if cant == 6 || 					///
lfit `outcome' year if cant == 6 & year < 2006 || 		///
lfit `outcome' year if cant == 6 & year > 2005  		///
, name(trend_`outcome', replace)

scatter resid`outcome' year if cant == 6 || 			///
lfit resid`outcome' year if cant == 6 & year < 2006 || 	///
lfit resid`outcome' year if cant == 6 & year > 2005 	///
, name(residualized_`outcome', replace)

scatter detrend`outcome' year if cant == 6 || 				///
lfit detrend`outcome' year if cant == 6 & year < 2006 ||	///
lfit detrend`outcome' year if cant == 6 & year > 2005  		///
, name(detrended_`outcome', replace)
}
// residualizing may not work well for these outcomes bc of break in series 2005,
// detrending may be better  (series are not negative pre 2005)
// parallel trends only in variables per capita
*/
*--------------------------------------------------------------------------------------
*--------------------------------------------------------------------------------------
*--------------------------------------------------------------------------------------
* EVENT STUDY GRAPHS with xtevent

cd "$mypathRR/Results/"
egen periods = group(year)

xtset cant periods

gen policyvar = (cant == 6 & year>2005)

drop if cant ==0

global outcomespaper "jobs FTE jobs_pc FTE_pc"


// estimate the models
foreach outcome in $outcomespaper {
	
	/*
	// save pre-reform levels to compute percentage changes - not possible, due to break in series and hence differences in levels
	sum `outcome' if cant == 6 & year == 2005
	global `outcome'2005 = `r(max)'
	*/
	// diff-in-diff 1: residualized outcome
	reghdfe detrend`outcome' interaction treated period if cant > 0 & cant <27, vce(cluster cant year) absorb(year cant)
	global ES_b_resid`outcome'=_b[interaction]
	global ES_se_resid`outcome'=_se[interaction]

	
	* xtevent model 1: pre-trend correction
	xtevent `outcome' if cant > 0 & cant <27, policyvar(policyvar) 	///
	window(-2 4) trend(-2) 											///
	reghdfe vce(cluster cant periods) 
	est sto `outcome'_pretrend_II
	
	xteventplot, name(comp_`outcome'II, replace) nosupt minus1label 		///
	ytitle("`: variable label `outcome''") 										///
	ylabel(`myla', labsize(small) grid) 										///
	xlab( , labsize(small) ang(45)) 				///
	xline(-0.2, lcolor(red)) xline(1.8, lcolor(green)) ///
	ciplotopts(recast(rarea) color(maroon*0.2) color(maroon%10))


	// diff-in-diff 2: no pre-trend correction
	reghdfe `outcome' interaction treated period if cant > 0 & cant <27 , vce(cluster cant year) absorb(year cant)
	global ES_b_`outcome'=_b[interaction]
	global ES_se_`outcome'=_se[interaction]


	* xtevent model 2: no pre-trend correction
	xtevent `outcome' if cant > 0 & cant <27, policyvar(policyvar) 	///
	window(-2 4)   													///
	reghdfe vce(cluster cant periods)  
	est sto `outcome'

	xteventplot, name(comp_`outcome', replace) nosupt minus1label  			///
	ytitle("`: variable label `outcome''") 										///
	ylabel(`myla', labsize(small) grid) 										///
	xlab(, labsize(small) ang(45)) 				///
	xline(-0.2, lcolor(red)) xline(1.8, lcolor(green)) ///
	ciplotopts(recast(rarea) color(maroon*0.2) color(maroon%10))
}



// store results in matrix for plotting
 foreach OUT in jobs_pc FTE_pc { 
		estimates restore `OUT'_pretrend_II
 
		* manually calculate confidence intervals from stored results
		loc df = e(df)
		di `df'
		tempname b V
		mat `b' = e(delta)
		mat `V' = e(Vdelta)
		tempvar se	
		
		* standard erors
	    mata st_matrix("`se'", sqrt( diagonal(st_matrix("`V'"))) ')	

		mat list `b'
		mat list `se'
		
		* confidence intervals
		forval n = 1/7 {
		loc ll`n' = `b'[1,`n'] - invttail(`df',0.5*0.05) * `se'[1,`n'] 
		loc ul`n' = `b'[1,`n'] + invttail(`df',0.5*0.05) * `se'[1,`n'] 
		}

		* create a new matrix to put the results
		matrix A`OUT' = J(3,9,.)
        matrix rownames A`OUT' = est ll95 ul95
        matrix colnames A`OUT' = _k_eq_m3 _k_eq_m2 _k_eq_m1 _k_eq_p0 _k_eq_p1 _k_eq_p2 _k_eq_p3 _k_eq_p4 _k_eq_p5
		mat list A`OUT'
		
		* put the results from xtevent into the new matrix
		forval n = 1/1 {
		loc i = `n'
		matrix A`OUT'[1,`n'] = `b'[1,`i'] \ `ll`i'' \ `ul`i''
		}
	 	matrix A`OUT'[1,2] = 0 \ 0 \ 0
	 	matrix A`OUT'[1,3] = 0 \ 0 \ 0
		forval n = 4/9 {
		loc i = `n'-2
		matrix A`OUT'[1,`n'] = `b'[1,`i'] \ `ll`i'' \ `ul`i''
		}
		
		* compare results stored in new matrix with output from xtevent
		mat list A`OUT'
		mat list `se'
		estimates replay `OUT'_pretrend_II
	 }
	 

		
	coefplot (matrix(Ajobs_pc), ci((2 3)) label(" ") omitted color(*0.7) ciopts(recast(rcap) color(*0.7) ))  ///
			 (matrix(AFTE_pc),  ci((2 3)) label(" ") omitted  /*ciopts(recast(rarea) color(navy*0.2) color(navy%10) )*/ ) ///
				,  ///
				coeflabels( _k_eq_m3 = "1995" _k_eq_m2 = "2001" _k_eq_m1 = "2005" 			///
				_k_eq_p0 = "2008" _k_eq_p1 = "2011" _k_eq_p2 = "2012" 						///
				_k_eq_p3 = "2013" _k_eq_p4 = "2014" _k_eq_p5 = "2015+") 					///
				vertical levels(95) ciopts(recast(rcap)) connect(direct) 		 			///
				relocate(_k_eq_m3 = -10 _k_eq_m2 = -5 _k_eq_m1 = -1 _k_eq_p0 = 2 _k_eq_p1 = 5 _k_eq_p2 = 6 _k_eq_p3 = 7 _k_eq_p4 = 8 _k_eq_p5 = 9) ///
				yline(0, lcolor(black)) 													///
				xline(0.1, lcolor(red)) xline(1.7, lcolor(green)) 							///
				xlabel(, labsize(small) ang(45)) ylabel(-20(10)42, labsize(small) grid) 	///
				ytitle("No. of jobs per 1,000 inhabitants") name(pc_trend, replace) 		///
					legend(order(2 "Total jobs" /* - "DiD-estimate: {&beta} = `:di %3.2f ${ES_b_jobs_pc}' (`:di %3.2f ${ES_se_jobs_pc}')" */						///
					4 "FTE" /*- "DiD-estimate: {&beta} = `:di %3.2f ${ES_b_FTE_pc}' (`:di %3.2f ${ES_se_FTE_pc}')" */ ) 											///
					row(2) ring(0) position(11) bmargin(small) size(small))

		
		

// store results in matrix for plotting
 foreach OUT in jobs_pc FTE_pc { 
		estimates restore `OUT'
		
		* manually calculate confidence intervals from stored results
		loc df = e(df)
		di `df'
		tempname b V
		mat `b' = e(delta)
		mat `V' = e(Vdelta)
		tempvar se
		
		* standard erors
	    mata st_matrix("`se'", sqrt( diagonal(st_matrix("`V'"))) ')	

		mat list `b'
		mat list `se'
		
		* confidence intervals
		forval n = 1/16 {
		loc ll`n' = `b'[1,`n'] - invttail(`df',0.5*0.05) * `se'[1,`n'] 
		loc ul`n' = `b'[1,`n'] + invttail(`df',0.5*0.05) * `se'[1,`n'] 
		}

		* create a new matrix to put the results
		matrix B`OUT' = J(3,9,.)
        matrix rownames B`OUT' = est ll95 ul95
        matrix colnames B`OUT' = _k_eq_m3 _k_eq_m2 _k_eq_m1 _k_eq_p0 _k_eq_p1 _k_eq_p2 _k_eq_p3 _k_eq_p4 _k_eq_p5
		mat list B`OUT'
		
		* put the results from xtevent into the new matrix
		forval n = 1/2 {
		loc i = `n'
		matrix B`OUT'[1,`n'] = `b'[1,`i'] \ `ll`i'' \ `ul`i''
		}
	 	matrix B`OUT'[1,3] = 0 \ 0 \ 0
		forval n = 4/9 {
		loc i = `n'-1
		matrix B`OUT'[1,`n'] = `b'[1,`i'] \ `ll`i'' \ `ul`i''
		}
		
		* compare results stored in new matrix with output from xtevent
		mat list B`OUT'
		mat list `se'
		estimates replay `OUT'
	 }
		
	coefplot (matrix(Bjobs_pc), ci((2 3)) label(" ") omitted color(*0.7) ciopts(recast(rcap) color(*0.7) ))  ///
			 (matrix(BFTE_pc),  ci((2 3)) label(" ") omitted  /*ciopts(recast(rarea) color(navy*0.2) color(navy%10) )*/ ) ///
				,  ///
				coeflabels( _k_eq_m3 = "1995" _k_eq_m2 = "2001" _k_eq_m1 = "2005" 			///
				_k_eq_p0 = "2008" _k_eq_p1 = "2011" _k_eq_p2 = "2012" 						///
				_k_eq_p3 = "2013" _k_eq_p4 = "2014" _k_eq_p5 = "2015+") 					///
				vertical levels(95) ciopts(recast(rcap)) connect(direct) 		 			///
				relocate(_k_eq_m3 = -10 _k_eq_m2 = -5 _k_eq_m1 = -1 _k_eq_p0 = 2 _k_eq_p1 = 5 _k_eq_p2 = 6 _k_eq_p3 = 7 _k_eq_p4 = 8 _k_eq_p5 = 9) ///
				yline(0, lcolor(black)) 													///
				xline(0.1, lcolor(red)) xline(1.7, lcolor(green)) 							///
				xlabel(, labsize(small) ang(45)) ylabel(-20(10)42, labsize(small) grid) 	///
				ytitle("No. of jobs per 1,000 inhabitants") name(pc_notrend, replace) 		///
					legend(order(2 "Total jobs" - "DiD-estimate: {&beta} = `:di %3.2f ${ES_b_jobs_pc}' (`:di %3.2f ${ES_se_jobs_pc}')" 						///
					4 "FTE"- "DiD-estimate: {&beta} = `:di %3.2f ${ES_b_FTE_pc}' (`:di %3.2f ${ES_se_FTE_pc}')" ) 											///
					row(2) ring(0) position(11) bmargin(small) size(small))
		graph export "Fig_10)-jobs+FTE_pc.pdf", as(pdf)	replace



								* * * * *  E N D  * * * * * * 
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
